library(dplyr)
# For Model fitting
library(lme4)
library(nlme)
library(purrr)
# For diagnostics
library(performance)
#library(see)
# For adding new columns
library(tibble)
library(MuMIn)# setwd for Rmarkdowm
knitr::opts_knit$set(root.dir = "~/Documents/projects/nutrients_and_water_effects_2022/stats/")# Load data
source("./scripts/code_join_data_full_dataset.R")# Load functions
## Models
source("./R/functions_models.R")
source("./R/function_nlme_validation_plots.R")Model Fitting
Models: Questions 1,2 and Mass fractions
Select performance and traits variables
data_for_models <-
data_for_models %>%
# Select variables for analysis
dplyr::select(!c(rgr_slope, d15n, above_biomass, below_biomass,
rmf, smf, lmf))\[response\sim treatment*fixer\ + initial\ height + random( 1|\ specie)\]
# Take response variables names
response_vars_q1_q2 <-
set_names(names(data_for_models)[c(5:14)])
response_vars_q1_q2 total_biomass agr rgr root_shoot_ratio
"total_biomass" "agr" "rgr" "root_shoot_ratio"
amax gs wue d13c
"amax" "gs" "wue" "d13c"
pnue Narea_g_m2
"pnue" "Narea_g_m2"
models_q1_q2 <- map(response_vars_q1_q2, ~ mixed_model_1(response = .x,
data = data_for_models))
names(models_q1_q2) [1] "total_biomass" "agr" "rgr" "root_shoot_ratio"
[5] "amax" "gs" "wue" "d13c"
[9] "pnue" "Narea_g_m2"
# Log models
## WUE log
model_q2_wue_log <- lmer(log(wue) ~ nfixer*treatment + init_height + (1|spcode),
data = data_for_models)
## PNUE log
model_q2_pnue_log <- lmer(log(pnue) ~ nfixer*treatment + init_height + (1|spcode),
data = data_for_models)
## Root-Shoot ratio
model_ratio_log <- lmer(log(root_shoot_ratio) ~ nfixer*treatment + init_height + (1|spcode),
data = data_for_models)
## Nitrogen log
model_nitrogen_log <- lmer(log(Narea_g_m2) ~ nfixer*treatment + init_height + (1|spcode),
data = data_for_models)
## Absolute growth rate
model_agr_log <- lmer(log(agr) ~ nfixer*treatment + init_height + (1|spcode),
data = data_for_models)# Append log models to model list Q1 Q2
log_models <- list(model_q2_wue_log, model_q2_pnue_log,
model_ratio_log, model_nitrogen_log,
model_agr_log)
names(log_models) <- c("wue_log", "pnue_log", "ratio_log", "nitrogen_log",
"agr_log")
models_q1_q2 <- append(log_models, models_q1_q2)
length(models_q1_q2)[1] 15
names(models_q1_q2) [1] "wue_log" "pnue_log" "ratio_log" "nitrogen_log"
[5] "agr_log" "total_biomass" "agr" "rgr"
[9] "root_shoot_ratio" "amax" "gs" "wue"
[13] "d13c" "pnue" "Narea_g_m2"
Model: Nodule count
- Chapter 9 Mixed models in ecology check glmmML package for count data
- GOOD ref https://www.dataquest.io/blog/tutorial-poisson-regression-in-r/
- #https://www.flutterbys.com.au/stats/tut/tut11.2a.html
# Load data
# This step was done like this because I am working with a subset of the data
# source cleaned data
source("./scripts/code_clean_data_nodules.R")
# Delete unused variables
data_nodules_cleaned <-
data_nodules_cleaned %>%
# add id to rownames for keep track of the rows
column_to_rownames("id") %>%
dplyr::select(spcode, treatment, everything())Previuosly I fitted non-log models and found out the log one is the best
log models
## After seeing the validation plots for the lmer_gaussian_log I decided to try
## to improve the log model using the nlme package
nlme_gaussian_log <- lme(log(number_of_root_nodulation) ~ treatment + init_height,
random = ~1|spcode,
data = data_nodules_cleaned)
nlme_nodule_log_weights <- lme(log(number_of_root_nodulation) ~ treatment + init_height,
random = ~1|spcode,
weights = varIdent(form = ~1|spcode),
data = data_nodules_cleaned)Compare models
# Check if the nlme_gaussian_log_weights is a better model
# Compare between nlme objects
anova(nlme_gaussian_log, nlme_nodule_log_weights) Model df AIC BIC logLik Test L.Ratio
nlme_gaussian_log 1 7 84.56385 96.89225 -35.28193
nlme_nodule_log_weights 2 9 62.79085 78.64165 -22.39542 1 vs 2 25.77301
p-value
nlme_gaussian_log
nlme_nodule_log_weights <.0001
model.sel(nlme_gaussian_log, nlme_nodule_log_weights)Model selection table
(Int) int_hgh trt weights df logLik AICc delta weight
nlme_nodule_log_weights 3.344 0.03767 + vrI(spc) 9 -22.395 67.5 0.00 1
nlme_gaussian_log 2.943 0.05571 + 7 -35.282 87.4 19.84 0
Abbreviations:
weights: vrI(spc) = 'varIdent(~1|spcode)'
Models ranked by AICc(x)
Random terms (all models):
1 | spcode
AIC(nlme_gaussian_log, nlme_nodule_log_weights) df AIC
nlme_gaussian_log 7 84.56385
nlme_nodule_log_weights 9 62.79085
# Model improved, check assumptions for nlme_gaussian_log_weightsModels: Question 3
\[performance\sim treatment:fixer:scaled(trait)\ + initial\ height + random( 1|\ specie)\]
Scale preditors aka traits
data_for_models_scaled <-
data_for_models %>%
# Narea_g_m2 not included since it has a high correlation with amax
mutate(across(c(4, 9:13), scale))Model RGR
lme_rgr_3way <- lme(rgr ~ treatment:nfixer:amax +
treatment:nfixer:gs +
treatment:nfixer:wue +
treatment:nfixer:pnue +
treatment:nfixer:d13c +
# Added
#treatment:nfixer:Narea_g_m2 +
init_height ,
random = ~1 | spcode,
data = data_for_models_scaled)Model AGR
lme_agr_3way <- lme(agr ~ treatment:nfixer:amax +
treatment:nfixer:gs +
treatment:nfixer:wue +
treatment:nfixer:pnue +
treatment:nfixer:d13c +
# Added
#treatment:nfixer:Narea_g_m2 +
init_height ,
random = ~1 | spcode,
data = data_for_models_scaled)Model Total Biomass
lme_total_biom_3way <- lme(total_biomass ~ treatment:nfixer:amax +
treatment:nfixer:gs +
treatment:nfixer:wue +
treatment:nfixer:pnue +
treatment:nfixer:d13c +
init_height,
random = ~1 | spcode,
data = data_for_models_scaled)Model shoot root ratios
lme_3way_root_shoot_ratio <- lme(root_shoot_ratio ~ treatment:nfixer:amax +
treatment:nfixer:gs +
treatment:nfixer:wue +
treatment:nfixer:pnue +
treatment:nfixer:d13c +
# Added
#treatment:nfixer:Narea_g_m2 +
init_height,
random = ~1 | spcode,
data = data_for_models_scaled)Model Validation
Validation of models for questions 1,2 and Mass fractions
Collinearity
map(models_q1_q2, check_collinearity)$wue_log
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
nfixer 1.30 [1.13, 1.72] 1.14 0.77 [0.58, 0.89]
treatment 3.85 [2.94, 5.18] 1.96 0.26 [0.19, 0.34]
init_height 1.05 [1.00, 3.34] 1.02 0.96 [0.30, 1.00]
nfixer:treatment 4.57 [3.46, 6.18] 2.14 0.22 [0.16, 0.29]
$pnue_log
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
nfixer 1.20 [1.06, 1.63] 1.09 0.84 [0.61, 0.94]
treatment 3.85 [2.94, 5.19] 1.96 0.26 [0.19, 0.34]
init_height 1.05 [1.00, 2.74] 1.03 0.95 [0.36, 1.00]
nfixer:treatment 4.31 [3.27, 5.82] 2.08 0.23 [0.17, 0.31]
$ratio_log
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
nfixer 1.03 [1.00, 6.70] 1.02 0.97 [0.15, 1.00]
treatment 3.87 [2.96, 5.22] 1.97 0.26 [0.19, 0.34]
init_height 1.06 [1.00, 2.12] 1.03 0.94 [0.47, 1.00]
nfixer:treatment 3.92 [2.99, 5.29] 1.98 0.26 [0.19, 0.33]
$nitrogen_log
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
nfixer 1.09 [1.01, 1.77] 1.04 0.92 [0.56, 0.99]
treatment 3.87 [2.95, 5.21] 1.97 0.26 [0.19, 0.34]
init_height 1.06 [1.00, 2.30] 1.03 0.94 [0.43, 1.00]
nfixer:treatment 4.05 [3.09, 5.47] 2.01 0.25 [0.18, 0.32]
$agr_log
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
nfixer 1.06 [1.00, 2.23] 1.03 0.94 [0.45, 1.00]
treatment 3.87 [2.95, 5.21] 1.97 0.26 [0.19, 0.34]
init_height 1.06 [1.00, 2.20] 1.03 0.94 [0.45, 1.00]
nfixer:treatment 3.98 [3.04, 5.37] 2.00 0.25 [0.19, 0.33]
$total_biomass
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
treatment 3.86 [2.95, 5.20] 1.97 0.26 [0.19, 0.34]
nfixer 1.12 [1.02, 1.66] 1.06 0.90 [0.60, 0.98]
init_height 1.06 [1.00, 2.40] 1.03 0.95 [0.42, 1.00]
treatment:nfixer 4.12 [3.13, 5.56] 2.03 0.24 [0.18, 0.32]
$agr
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
treatment 3.87 [2.95, 5.21] 1.97 0.26 [0.19, 0.34]
nfixer 1.06 [1.00, 2.12] 1.03 0.94 [0.47, 1.00]
init_height 1.06 [1.00, 2.22] 1.03 0.94 [0.45, 1.00]
treatment:nfixer 3.99 [3.04, 5.39] 2.00 0.25 [0.19, 0.33]
$rgr
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
treatment 3.87 [2.96, 5.22] 1.97 0.26 [0.19, 0.34]
nfixer 1.05 [1.00, 2.86] 1.02 0.95 [0.35, 1.00]
init_height 1.06 [1.00, 2.17] 1.03 0.94 [0.46, 1.00]
treatment:nfixer 3.96 [3.02, 5.34] 1.99 0.25 [0.19, 0.33]
$root_shoot_ratio
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
treatment 3.87 [2.96, 5.22] 1.97 0.26 [0.19, 0.34]
nfixer 1.03 [1.00, 13.41] 1.01 0.97 [0.07, 1.00]
init_height 1.07 [1.00, 2.10] 1.03 0.94 [0.48, 1.00]
treatment:nfixer 3.91 [2.98, 5.27] 1.98 0.26 [0.19, 0.34]
$amax
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
treatment 3.88 [2.96, 5.22] 1.97 0.26 [0.19, 0.34]
nfixer 1.03 [1.00, 20.89] 1.01 0.97 [0.05, 1.00]
init_height 1.07 [1.00, 2.10] 1.03 0.94 [0.48, 1.00]
treatment:nfixer 3.90 [2.98, 5.26] 1.98 0.26 [0.19, 0.34]
$gs
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
treatment 3.83 [2.92, 5.15] 1.96 0.26 [0.19, 0.34]
nfixer 1.92 [1.56, 2.53] 1.39 0.52 [0.40, 0.64]
init_height 1.03 [1.00, 11.74] 1.01 0.97 [0.09, 1.00]
Moderate Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
treatment:nfixer 6.08 [4.55, 8.28] 2.47 0.16 [0.12, 0.22]
$wue
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
treatment 3.85 [2.94, 5.19] 1.96 0.26 [0.19, 0.34]
nfixer 1.19 [1.06, 1.62] 1.09 0.84 [0.62, 0.95]
init_height 1.05 [1.00, 2.71] 1.03 0.95 [0.37, 1.00]
treatment:nfixer 4.29 [3.26, 5.80] 2.07 0.23 [0.17, 0.31]
$d13c
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
treatment 3.85 [2.94, 5.19] 1.96 0.26 [0.19, 0.34]
nfixer 1.20 [1.06, 1.63] 1.09 0.84 [0.61, 0.94]
init_height 1.05 [1.00, 2.74] 1.03 0.95 [0.37, 1.00]
treatment:nfixer 4.31 [3.27, 5.82] 2.07 0.23 [0.17, 0.31]
$pnue
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
treatment 3.85 [2.94, 5.18] 1.96 0.26 [0.19, 0.34]
nfixer 1.30 [1.12, 1.71] 1.14 0.77 [0.58, 0.89]
init_height 1.05 [1.00, 3.28] 1.02 0.96 [0.30, 1.00]
treatment:nfixer 4.55 [3.44, 6.15] 2.13 0.22 [0.16, 0.29]
$Narea_g_m2
# Check for Multicollinearity
Low Correlation
Term VIF VIF 95% CI Increased SE Tolerance Tolerance 95% CI
treatment 3.86 [2.95, 5.21] 1.97 0.26 [0.19, 0.34]
nfixer 1.10 [1.01, 1.70] 1.05 0.91 [0.59, 0.99]
init_height 1.06 [1.00, 2.35] 1.03 0.95 [0.43, 1.00]
treatment:nfixer 4.08 [3.11, 5.51] 2.02 0.24 [0.18, 0.32]
# No problems found here
map(models_q1_q2, check_model)$wue_log
$pnue_log
$ratio_log
$nitrogen_log
$agr_log
$total_biomass
$agr
$rgr
$root_shoot_ratio
$amax
$gs
$wue
$d13c
$pnue
$Narea_g_m2
Validation of nodule count models
Checking log nodule count model’s assumptions
Zuur et al pp 84:
“Note that these residuals still show heterogeneity, but this is now allowed (because the residual variation differs depending on the chosen variance structure and values of the variance covariate). Hence, these residuals are less useful for the model validation process.”
nlme_validation_plots(nlme_nodule_log_weights, data = data_nodules_cleaned,
group = "spcode", variables = c("treatment") )Validation of models for question 3
RGR
nlme_validation_plots(lme_rgr_3way, data = data_for_models_scaled, group = "spcode")[1] "No variable specified inthe variables argument"
AGR
nlme_validation_plots(lme_agr_3way, data = data_for_models_scaled, group = "spcode")[1] "No variable specified inthe variables argument"
Total biomass
nlme_validation_plots(lme_total_biom_3way, data = data_for_models_scaled,
group = "spcode")[1] "No variable specified inthe variables argument"
Root-shoot ratio
nlme_validation_plots(lme_3way_root_shoot_ratio, data = data_for_models_scaled,
group = "spcode")[1] "No variable specified inthe variables argument"
Save model lists as .RData
Models for Q1, Q2 and Mass fractions
models_q1_q2 <-
models_q1_q2 %>%
# Remove models that showed violation of the assumptions
purrr::list_modify("pnue" = NULL, "wue" = NULL,
"Narea_g_m2" = NULL, "root_shoot_ratio" = NULL)Nodule count
models_nodule_count <- list(nlme_gaussian_log, nlme_nodule_log_weights)
names(models_nodule_count) <- c("nlme_gaussian_log", "nlme_gaussian_log_weights")3-way models
models_3way_q3 <- list(lme_rgr_3way, lme_total_biom_3way,
lme_3way_root_shoot_ratio, lme_agr_3way)
names(models_3way_q3) <- c("lme_rgr_3way", "lme_total_biom_3way",
"lme_3way_root_shoot_ratio", "lme_agr_3way")saveRDS(models_q1_q2, file = "./processed_data/models_q1_q2.RData")
saveRDS(models_nodule_count, file = "./processed_data/models_nodule_count.RData")
saveRDS(models_3way_q3, file = "./processed_data/models_3way_q3.RData")